package cn.lagou.sparksql

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession


object LogAnalyze {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession
      .builder()
      .appName(this.getClass.getCanonicalName)
      .master("local[*]")
      .getOrCreate()
    val sc = spark.sparkContext
    sc.setLogLevel("warn")


    //计算独立IP数
    val cdnRDD: RDD[String] = sc.textFile("data/cdn.txt")

    val ipaddr: RDD[String] = cdnRDD.map(_.split("\\s+")(0))

    ipaddr.map(i => (i, 1)).reduceByKey(_+_).sortBy(_._2,false).collect().foreach(println)

    spark.close()
  }
}
